import re
import requests
from multiprocessing.dummy import Pool
import csv

def web_html(url):
    headers={"user-agent":'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
    html = requests.get(url,headers = headers).content.decode("utf8")
    return html

def web_con(url):
    page_html = web_html(url)
    name = re.findall('<a data-field=.*?>(.*?)</a>',page_html,re.S)
    content = re.findall('<div id="post_content_.*?>   (.*?)</div>',page_html,re.S)
    time = re.findall('楼</span><span class="tail-info">(.*?)</span></div>',page_html,re.S)
    con_list = []
    for i in range(0, len(name)):
        con_list.append("\n"+"发帖人："+re.sub('<.*?>','',name[i])+"\n发帖时间："+time[i]+"\t"+"\n"+"发帖内容："+re.sub('<.*?>','',content[i].replace('      ',''))+"\n")
    return con_list

def get_csv(list):
    with open('爬取.csv',"w",encoding="utf8") as f:
        for i in range(len(list)):
            f.write(list[i])

def page_num(url):
    html = web_html(url)
    tmp = re.findall('<span class="red">(.*?)</span>',html)
    return tmp[0]

def web(url):
    url_list=[]
    tem_list=[]
    all_page=int(page_num(url))
    for i in range(1,all_page+1):
        url_list.append(url+"?pn="+str(i))
    pool = Pool(5)
    m=pool.map(web_con,url_list)
    for i in range(len(m)):
        for j in range(len(m[i])):
            tem_list.append(m[i][j])
    get_csv(tem_list)

if __name__ == '__main__':
    web("https://tieba.baidu.com/p/7255252097")